rm(list = ls())
library(tidyverse)
library(foreign)


vf_imp <- read_csv("voter_file_hist_all_imp.csv")



#Filtering to General Election Voters
vf_imp <-  filter(vf_imp, Y2020_003_Vote != "NV")

vf_imp_gen_reclass <- filter(vf_imp, Y2020_003_Vote != "NV") %>% 
  mutate(pres_app_bin_ga_extend_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                    Y2020_001_party != "D" ~ pres_app_bin_ga_extend,
                                                    is.na(Y2020_001_party) == 1 ~ pres_app_bin_ga_extend) ) %>%
  mutate(pres_app_bin_ga_simp_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                        Y2020_001_party != "D" ~ pres_app_bin_ga_simp,
                                                        is.na(Y2020_001_party) == 1 ~ pres_app_bin_ga_simp) ) %>%
  mutate(pres_app_bin_ga_rv_extend_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                      Y2020_001_party != "D" ~ pres_app_bin_ga_rv_extend,
                                                      is.na(Y2020_001_party) == 1 ~ pres_app_bin_ga_rv_extend) ) %>%
  mutate(pres_app_bin_ga_rv_simp_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                           Y2020_001_party != "D" ~ pres_app_bin_ga_rv_simp,
                                                           is.na(Y2020_001_party) == 1 ~ pres_app_bin_ga_rv_simp) ) %>%
  mutate(pres_app_bin_ga_rv_method_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                         Y2020_001_party != "D" ~ pres_app_bin_ga_rv_method,
                                                         is.na(Y2020_001_party) == 1 ~ pres_app_bin_ga_rv_method) ) %>%
  mutate(vote_trump_biden_ga_rv_extend_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                           Y2020_001_party != "D" ~ vote_trump_biden_ga_rv_extend,
                                                           is.na(Y2020_001_party) == 1 ~ vote_trump_biden_ga_rv_extend) ) %>% 
  mutate(pres_app_bin_ga_method_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                               Y2020_001_party != "D" ~ pres_app_bin_ga_method,
                                                               is.na(Y2020_001_party) == 1 ~ pres_app_bin_ga_method) ) %>%
  mutate(vote_trump_biden_ga_extend_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                        Y2020_001_party != "D" ~ vote_trump_biden_ga_extend,
                                                        is.na(Y2020_001_party) == 1 ~ vote_trump_biden_ga_extend) ) %>%
  mutate(vote_trump_biden_ga_rv_method_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                            Y2020_001_party != "D" ~ vote_trump_biden_ga_rv_method,
                                                            is.na(Y2020_001_party) == 1 ~ vote_trump_biden_ga_rv_method) ) %>%
  mutate(vote_trump_biden_ga_method_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                               Y2020_001_party != "D" ~ vote_trump_biden_ga_method,
                                                               is.na(Y2020_001_party) == 1 ~ vote_trump_biden_ga_rv_method) ) %>%
  mutate(trump_index_naive_ga_rv_extend_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                            Y2020_001_party != "D" ~ trump_index_naive_ga_rv_extend,
                                                            is.na(Y2020_001_party) == 1 ~ trump_index_naive_ga_rv_extend) ) %>%
  mutate(trump_index_naive_ga_rv_method_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                                Y2020_001_party != "D" ~ trump_index_naive_ga_rv_method,
                                                                is.na(Y2020_001_party) == 1 ~ trump_index_naive_ga_rv_method) ) %>%
  mutate(trump_index_naive_ga_extend_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                                Y2020_001_party != "D" ~ trump_index_naive_ga_extend,
                                                                is.na(Y2020_001_party) == 1 ~ trump_index_naive_ga_extend) ) %>% 
  mutate(trump_index_naive_ga_method_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                             Y2020_001_party != "D" ~ trump_index_naive_ga_method,
                                                             is.na(Y2020_001_party) == 1 ~ trump_index_naive_ga_method) ) %>%
  mutate(vote_trump_biden_ga_rv_simp_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                             Y2020_001_party != "D" ~ vote_trump_biden_ga_rv_simp,
                                                             is.na(Y2020_001_party) == 1 ~ vote_trump_biden_ga_rv_simp) ) %>%
  mutate(vote_trump_biden_ga_simp_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                             Y2020_001_party != "D" ~ vote_trump_biden_ga_simp,
                                                             is.na(Y2020_001_party) == 1 ~ vote_trump_biden_ga_simp) ) %>%
  mutate(pres_app_num_ga_rv_extend_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                          Y2020_001_party != "D" ~ pres_app_num_ga_rv_extend,
                                                          is.na(Y2020_001_party) == 1 ~ pres_app_num_ga_rv_extend) ) %>%
  mutate(trump_index_naive_ga_rv_simp_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                           Y2020_001_party != "D" ~ trump_index_naive_ga_rv_simp,
                                                           is.na(Y2020_001_party) == 1 ~ trump_index_naive_ga_rv_simp) ) %>%
  mutate(pres_app_num_ga_extend_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                              Y2020_001_party != "D" ~ pres_app_num_ga_extend,
                                                              is.na(Y2020_001_party) == 1 ~ pres_app_num_ga_extend) ) %>% 
  mutate(pres_app_num_ga_rv_method_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                        Y2020_001_party != "D" ~ pres_app_num_ga_rv_method,
                                                        is.na(Y2020_001_party) == 1 ~ pres_app_num_ga_rv_method) ) %>%
  mutate(trump_index_naive_ga_simp_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                           Y2020_001_party != "D" ~ trump_index_naive_ga_simp,
                                                           is.na(Y2020_001_party) == 1 ~ trump_index_naive_ga_simp) ) %>%
  mutate(pres_app_num_ga_method_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                           Y2020_001_party != "D" ~ pres_app_num_ga_method,
                                                           is.na(Y2020_001_party) == 1 ~ pres_app_num_ga_method) ) %>%
  mutate(trump_index_irt_ga_rv_extend_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                        Y2020_001_party != "D" ~ trump_index_irt_ga_rv_extend,
                                                        is.na(Y2020_001_party) == 1 ~ trump_index_irt_ga_rv_extend) ) %>%
  mutate(trump_index_irt_ga_rv_method_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                              Y2020_001_party != "D" ~ trump_index_irt_ga_rv_method,
                                                              is.na(Y2020_001_party) == 1 ~ trump_index_irt_ga_rv_method) ) %>%
  mutate(trump_index_irt_ga_method_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                              Y2020_001_party != "D" ~ trump_index_irt_ga_method,
                                                              is.na(Y2020_001_party) == 1 ~ trump_index_irt_ga_method) ) %>%
  mutate(trump_index_irt_ga_extend_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                           Y2020_001_party != "D" ~ trump_index_irt_ga_extend,
                                                           is.na(Y2020_001_party) == 1 ~ trump_index_irt_ga_extend) ) %>%
  mutate(pres_app_num_ga_simp_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                           Y2020_001_party != "D" ~ pres_app_num_ga_simp,
                                                           is.na(Y2020_001_party) == 1 ~ pres_app_num_ga_simp) ) %>%
  mutate(pres_app_num_ga_rv_simp_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                      Y2020_001_party != "D" ~ pres_app_num_ga_rv_simp,
                                                      is.na(Y2020_001_party) == 1 ~ pres_app_num_ga_rv_simp) ) %>%
  mutate(trump_index_irt_ga_rv_simp_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                         Y2020_001_party != "D" ~ trump_index_irt_ga_rv_simp,
                                                         is.na(Y2020_001_party) == 1 ~ trump_index_irt_ga_rv_simp) ) %>%
  mutate(trump_index_irt_ga_simp_gen_reclass = case_when(Y2020_001_party == "D" ~ 0, 
                                                            Y2020_001_party != "D" ~ trump_index_irt_ga_simp,
                                                            is.na(Y2020_001_party) == 1 ~ trump_index_irt_ga_simp) ) %>%
  select(-c(pres_app_bin_ga_extend, pres_app_bin_ga_simp, pres_app_bin_ga_rv_method, vote_trump_biden_ga_rv_extend, pres_app_bin_ga_method, pres_app_bin_ga_rv_extend, pres_app_bin_ga_rv_simp, vote_trump_biden_ga_rv_method, trump_index_naive_ga_method, 
            trump_index_naive_ga_extend, trump_index_naive_ga_rv_method, trump_index_naive_ga_rv_extend, vote_trump_biden_ga_rv_simp, vote_trump_biden_ga_simp, 
            pres_app_num_ga_rv_extend, pres_app_num_ga_extend, trump_index_naive_ga_rv_simp,  pres_app_num_ga_method, trump_index_irt_ga_rv_extend, 
            vote_trump_biden_ga_extend, pres_app_num_ga_rv_method,  trump_index_naive_ga_simp, pres_app_num_ga_simp, vote_trump_biden_ga_method,
            trump_index_irt_ga_rv_method, trump_index_irt_ga_method, trump_index_irt_ga_extend, pres_app_num_ga_rv_simp, trump_index_irt_ga_rv_simp, trump_index_irt_ga_simp))
  

#State-Level Predictions
mu_hat_TS <- mean(vf_imp$pres_app_bin_ga_rv_extend)
mu_hat_TS_gen_reclass <- mean(vf_imp_gen_reclass$pres_app_bin_ga_rv_extend_gen_reclass)
#Target: Trump Realized 2 Party VS 2,461,854/(2,461,854 +2,473,633) = 0.4988067

## Precinct Plots
# Load Georgia Precinct ID-Precinct Name Crosswalk and Clean
precinct_results_2020 <- read_csv("2020_Precinct_Results.csv") %>%
  filter(is.na(CTYNAME) != 1) %>%
  mutate(
    PRECINCT_I = case_when(
      (CTYNAME == "DEKALB" & PRECINCT_N == "COAN RECREATION CENTER" & PRECINCT_I == "WC") ~ "CN",
      TRUE ~ PRECINCT_I
    ),
    PRECINCT_I = case_when(
      (CTYNAME == "DEKALB" & PRECINCT_N == "CHAMBLEE 2" & PRECINCT_I == "CX") ~ "CZ",
      TRUE ~ PRECINCT_I
    ),
    leazero_PRECINCT_I = paste0("0", PRECINCT_I),
    PRECINCT_I = case_when(
      CTYNAME=="PAULDING"~ leazero_PRECINCT_I,
      TRUE ~ PRECINCT_I
    )
  ) %>% 
  select(-c(leazero_PRECINCT_I))
  

# Merge County Codes
ga_county_codes <- read_csv("GA_county_IDs.csv")
vf_imp <- left_join(vf_imp, ga_county_codes, by = "COUNTY_CODE")
vf_imp_gen_reclass <- left_join(vf_imp_gen_reclass, ga_county_codes, by = "COUNTY_CODE")

precinct_results_2020_cocodes <- merge(precinct_results_2020, ga_county_codes, by = c("CTYNAME"), all.x = TRUE) %>% 
  rename(COUNTY_PRECINCT_ID = PRECINCT_I) %>%
  mutate(
    PRECINCT_N = case_when(
      (CTYNAME == "CHATHAM" & PRECINCT_N == "ELI WHITNEY COMPLEX" & COUNTY_PRECINCT_ID == "2-06C" ) ~ "2-06C ELI WHITNEY COMPLEX",
      TRUE ~ PRECINCT_N),
    PRECINCT_N = case_when(
      (CTYNAME == "CHATHAM" & PRECINCT_N == "ELI WHITNEY COMPLEX" & COUNTY_PRECINCT_ID == "3-15C" ) ~ "3-15C ELI WHITNEY COMPLEX",
      TRUE ~ PRECINCT_N)
  )

#Reading in Precinct-Level Vote Shares from SOS
precinct_results_2020_sos <- read.dta("General_2020_GIS_Merge.dta") 

#Merging in SOS PRECINCT NAMES
precinct_level_2020_outcomes <- merge(precinct_results_2020_cocodes, precinct_results_2020_sos, by = c("CTYNAME", "PRECINCT_N"), all.x = TRUE, all.y= TRUE) %>%
  select(CTYNAME, COUNTY_PRECINCT_ID, pr_trump_2pshare = trump_2pvs)

#Get precinct-level estimates
vf_est <- vf_imp %>%
  group_by(CTYNAME, COUNTY_PRECINCT_ID) %>%
  summarize(across(matches("(_ga_)|(_ga_rv_)"), ~mean(., na.rm = TRUE)))

precinct_est <- left_join(precinct_level_2020_outcomes, vf_est, by = c("CTYNAME", "COUNTY_PRECINCT_ID"))

precinct_est %>%
  mutate(across(matches("(_ga_)|(_ga_rv_)"), ~mean((pr_trump_2pshare -.)^2, na.rm = TRUE))) %>% 
  select(matches("(_ga_)|(_ga_rv_)")) %>%
  distinct() %>%
  pivot_longer(cols = everything(), names_to = "model", values_to = "mse") %>%
  arrange(mse) %>%
  as.data.frame()

summary(lm(pr_trump_2pshare ~ pres_app_bin_ga_rv_extend, precinct_est))




vf_est_gen_reclass <- vf_imp_gen_reclass %>%
  group_by(CTYNAME, COUNTY_PRECINCT_ID) %>%
  summarize(across(matches("(_ga_)|(_ga_rv_)"), ~mean(., na.rm = TRUE)))

precinct_est_gen_reclass <- left_join(precinct_level_2020_outcomes, vf_est_gen_reclass, by = c("CTYNAME", "COUNTY_PRECINCT_ID"))

precinct_est_gen_reclass %>%
  mutate(across(matches("(_ga_)|(_ga_rv_)"), ~mean((pr_trump_2pshare -.)^2, na.rm = TRUE))) %>% 
  select(matches("(_ga_)|(_ga_rv_)")) %>%
  distinct() %>%
  pivot_longer(cols = everything(), names_to = "model", values_to = "mse") %>%
  arrange(mse) %>%
  as.data.frame()

summary(lm(pr_trump_2pshare ~ pres_app_bin_ga_rv_extend_gen_reclass, precinct_est_gen_reclass))

temp <- paste(expression(hat(mu)), "== ", round(mu_hat_TS_gen_reclass, digits = 4))


export <- precinct_est %>%
  left_join(precinct_est_gen_reclass, by = c("CTYNAME", "COUNTY_PRECINCT_ID"), keep = FALSE) %>%
  select(pres_app_bin_ga_rv_extend, pres_app_bin_ga_rv_extend_gen_reclass, pr_trump_2pshare.x, CTYNAME, COUNTY_PRECINCT_ID) %>%
  rename(pr_trump_2pshare = pr_trump_2pshare.x) %>%
  mutate(statewide_mean = mu_hat_TS,
         statewide_reclass_mean = mu_hat_TS_gen_reclass) %>%
  filter(is.na(pr_trump_2pshare) == 0)



write_csv(export, "Prec_OOS.csv")